TEOS Hydrostation S Data Lab

Mileisha L. Velázquez

2023-03-20


Load required libraries

library(tidyverse)
library(gsw)

Import Data:

Names of columns are separated by commas and white spaces, meanwhile the observations within the rows are separated by only white spaces. This requires rearranging data so that the we may view both the data with the correct column names

hydrostation_bottle <- read_delim("hydrostation_bottle.txt", 
    delim = "\t", escape_double = FALSE, 
    trim_ws = TRUE, skip = 31)


hydrostation_bottle_names <- read_csv("hydrostation_bottle.txt", 
    skip = 30)

colnames(hydrostation_bottle)=colnames(hydrostation_bottle_names)

Variable Names and Units:

yyyymmdd = Year Month Day   
decy   = Decimal Year     
time   = Time (hhmm)      
latN   = Latitude (Deg N) 
lonW   = Longitude (Deg W)
Depth  = Depth (m)                  
Temp   = Temperature ITS-90 (C) 
Pres   = CTD Pressure (dbar)   
CTD_S  = CTD Salinity (PSS-78)      
Sal1   = Salinity-1 (PSS-78)        
Sig-th = Sigma-Theta (kg/m^3)       
O2(1)  = Oxygen-1 (umol/kg)          
OxFixT = Oxygen Fix Temp (C)        
Anom1  = Oxy Anomaly-1 (umol/kg)    
// Quality flags //
-999  = No data
 0 = Less than detection limit

Plots:

hydrostation_bottle %>% 
  filter(`Sig-th` !=-999 & Depth <20) %>% #filter out -999, no data flag and depth for better visual
  ggplot()+geom_line(aes(x=decy,y=`Sig-th`)) #use to visualize patter in data 

Figure 1. Clear seasonal signal is observed for sigma-theta

hydrostation_bottle %>% 
  filter(`Sig-th` !=-999 & Depth <20) %>% 
  #filter out -999, no data flag and depth for better visual
  ggplot()+geom_point(aes(x=Temp,y=`Sig-th`))#there are two outliers

Figure 2. Relationship between temperature and density. Both are strongly correlated

What to consider

  • Density data from 1988-present, but salinity data from 1950s-present

  • Calculate seawater density form 1950s - present

  • TEOS-10 usage for this procedure

TEOS-10 Toolbox in Packag seacarb

#gsw
#gsw_sigma0
 #requires SA (absolute salinity) and CT (conservative temperature)

#gsw_SA_from_SP
#sea pressure in (dbar), log, lat, practical sal
#Pressure plot missing points
hydrostation_bottle %>% 
  ggplot()+
  geom_point(aes(x=decy, y=Pres))

#Depth data for all time series data 
hydrostation_bottle %>% 
  ggplot()+
  geom_point(aes(x=decy, y=Depth))

Calculate Sea Pressure from Depth Using GSW Function

hydrostation_bottle=
  hydrostation_bottle %>% 
mutate(Pres_gsw=gsw_p_from_z(Depth*-1,latN))

#Check new variable in plot
hydrostation_bottle %>% 
  ggplot()+
#strong 1:1 agreement between measured pressure and calculated pressure
  geom_point(aes(x=Pres, y=Pres_gsw))

Calculate Absolue Salinity from Practical Salinity

hydrostation_bottle %>% 
  ggplot()+
  geom_point(aes(x=decy, y=Sal1))

hydrostation_bottle=
  hydrostation_bottle %>% 
mutate(Pres_gsw=gsw_p_from_z(Depth*-1,latN)) %>% 
  mutate(S_abs_gsw=gsw_SA_from_SP(Sal1,Pres_gsw, 360-lonW, latN))

#Check
hydrostation_bottle %>% 
  ggplot()+
  geom_point(aes(x=decy, y=S_abs_gsw))

#Another way to check 
hydrostation_bottle %>% 
  filter(Sal1!= -999) %>% 
  ggplot()+
  geom_point(aes(x=Sal1, y=S_abs_gsw))

Calculate Conservative Temperature

#gsw_CT_from_t
#absolute salinity , in situ temp (ITS-90), & sea pressure 

#Add line to calculate CT 
hydrostation_bottle=
  hydrostation_bottle %>% 
  filter(Sal1!=-999) %>%
mutate(Pres_gsw=gsw_p_from_z(Depth*-1,latN)) %>% 
  mutate(S_abs_gsw=gsw_SA_from_SP(Sal1,Pres_gsw, 360-lonW, latN)) %>% 
  mutate(T_cons_gsw=gsw_CT_from_t(S_abs_gsw, Temp, Pres_gsw))
   
#Check data
  hydrostation_bottle %>%
  filter(Sal1!=-999) %>% 
  ggplot()+
  geom_point(aes(x=Temp, y=T_cons_gsw))

#Still missing data 
  hydrostation_bottle=
  hydrostation_bottle %>% 
  filter(Sal1!=-999) %>%
    filter(Temp!=-999) %>% 
mutate(Pres_gsw=gsw_p_from_z(Depth*-1,latN)) %>% 
  mutate(S_abs_gsw=gsw_SA_from_SP(Sal1,Pres_gsw, 360-lonW, latN)) %>% 
  mutate(T_cons_gsw=gsw_CT_from_t(S_abs_gsw, Temp, Pres_gsw))
  
  #Check again
  hydrostation_bottle %>%
  filter(Sal1!=-999) %>%
   filter(Temp!=-999) %>% 
  ggplot()+
  geom_point(aes(x=Temp, y=T_cons_gsw))

Finally Calculate Sigma-theta: requires SA (absolute salinity) and CT (conservative temperature)

#FOR HW 
  HydroS=
  hydrostation_bottle=
  hydrostation_bottle %>% 
  filter(Sal1!=-999) %>%
    filter(Temp!=-999) %>% 
  #calculate pressure
mutate(Pres_gsw=gsw_p_from_z(Depth*-1,latN)) %>% 
  #calc. absolute salinity
  mutate(S_abs_gsw=gsw_SA_from_SP(Sal1,Pres_gsw, 360-lonW, latN)) %>% 
  #calc.conservative temperature
  mutate(T_cons_gsw=gsw_CT_from_t(S_abs_gsw, Temp, Pres_gsw)) %>% 
  #calc. sigma-theta
  mutate(Sig_th_gsw=gsw_sigma0(S_abs_gsw,T_cons_gsw))

HydroS %>% 
  filter(`Sig-th`!=-999) %>% 
  ggplot()+
  geom_point(aes(x=`Sig-th`, y=Sig_th_gsw))

Identify and Fix Error:

HydroS %>% #Low sig-th-gsw point
  filter(Sig_th_gsw<0)
## # A tibble: 1 × 19
##         Id yyyymmd  decy  time  latN  lonW Depth  Pres  Temp CTD_S  Sal1 Sig-t…¹
##      <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1   6.13e9  2.02e7 2019.  1730  32.2  64.5   3.5   3.6  28.9  36.6  1.15    23.3
## # … with 7 more variables: `O2(1)` <dbl>, OxFix <dbl>, Anom1 <dbl>,
## #   Pres_gsw <dbl>, S_abs_gsw <dbl>, T_cons_gsw <dbl>, Sig_th_gsw <dbl>, and
## #   abbreviated variable name ¹​`Sig-th`
 #Sal1 too low, caused low sig-th-gsw calculation 

#Calculate Sig-th-gsw with CTD_S instead of Sal1 for outlier point 
HydroS_correctedS_a=  
HydroS %>% 
filter(Sig_th_gsw<0) %>% 
  mutate(S_abs_gsw=gsw_SA_from_SP(CTD_S,Pres_gsw, 360-lonW, latN)) %>% 
  mutate(T_cons_gsw=gsw_CT_from_t(S_abs_gsw, Temp, Pres_gsw)) %>% 
  mutate(Sig_th_gsw=gsw_sigma0(S_abs_gsw,T_cons_gsw))
  
HydroS_correctedS_b=  
HydroS %>% 
filter(Sig_th_gsw>0)

HydroS_corrected = rbind(HydroS_correctedS_a, HydroS_correctedS_b)
HydroS_corrected %>% 
  filter(`Sig-th`!=-999) %>% 
  ggplot()+
  geom_point(aes(x=`Sig-th`, y=Sig_th_gsw))

HydroS_corrected %>% 
  
  ggplot()+

  geom_point(aes(x=Sig_th_gsw,y=Depth))+

  scale_y_reverse()+

  scale_x_continuous(position="top")+

  xlab(expression(paste(sigma[theta]," (kg m"^"-3",")")))+

  ylab("Depth (m)")+

  theme_classic()

Linear Models - Has surface sigma theta decreased over time?

HydroS_shallow = HydroS_corrected %>% 

                 filter(Depth<30)#shallow 

?lm #linear models

#lm(y~x,data=data)

lm(Sig_th_gsw~decy,data=HydroS_shallow)
## 
## Call:
## lm(formula = Sig_th_gsw ~ decy, data = HydroS_shallow)
## 
## Coefficients:
## (Intercept)         decy  
##   33.404723    -0.004238
#lm(formula, data)
#response~terms -> Y~X

#y=mx+b

#y = Sig_th_gsw

#x = decy

#coefficients: intercept = b, decy = m

#Sig_th_gsw = -0.004*decy + 33.4

#(kg/m^3) = (kg/m^3/y)*y + (kg/m^3)

sig_theta_time_model=lm(Sig_th_gsw~decy,data=HydroS_shallow)

summary(sig_theta_time_model)
## 
## Call:
## lm(formula = Sig_th_gsw ~ decy, data = HydroS_shallow)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5789 -0.8683  0.1070  0.8819  1.5805 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 33.4047230  1.6704055  19.998  < 2e-16 ***
## decy        -0.0042378  0.0008387  -5.053 4.59e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9401 on 3410 degrees of freedom
## Multiple R-squared:  0.007431,   Adjusted R-squared:  0.00714 
## F-statistic: 25.53 on 1 and 3410 DF,  p-value: 4.587e-07

Plotly - Interactive Plot for HydroS_shallow data

library(plotly)

plot=HydroS_shallow %>% 

  ggplot(aes(x=decy,y=Sig_th_gsw))+

  geom_point()+

  geom_line()+

  geom_smooth(method="lm")+
#aids in overplotting visualization 
  
  theme_classic()

ggplotly(plot)

Sigma theta values do change over time. The regression line slightly decreases from 1960 to 2020

Seanonal Variability of Sigma-Theta Values Plot

Winter months in Bermuda: December - March, while summer months are from August through October.

HydroS_seasons=
  HydroS_corrected %>% 
  mutate(month=as.numeric(substr(yyyymmd, 5, 6 ))) %>% 
mutate(season=ifelse(month==12|| month==1| month==2| month==3, 'winter',
                               ifelse(month==8|month==9|month==10,'summer','')))

summary(lm(`O2(1)`~season,data=HydroS_seasons))
## 
## Call:
## lm(formula = `O2(1)` ~ season, data = HydroS_seasons)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1184.03    10.63    30.03    61.68   143.52 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   183.965      1.520 121.030  < 2e-16 ***
## seasonsummer    1.064      2.754   0.386    0.699    
## seasonwinter  -27.584      3.435  -8.030 1.01e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 202 on 29693 degrees of freedom
## Multiple R-squared:  0.002363,   Adjusted R-squared:  0.002296 
## F-statistic: 35.17 on 2 and 29693 DF,  p-value: 5.543e-16

Questions:

  • Is dissolved O2 higher in summer than winter?
  • Is salinity higher in summer than winter?
  • Is sound speed higher?

Lab Assignment

  1. Pick a question (include hypothesis)
  • Q: How does temperature and salinity co-vary over time?
  • H: As time passes, both temperature and salinity parameters will experience variability.
  1. Produce a plot and a statistical summary using lm()
  2. Describe your results, the summary, and answer the question
  3. Compile into a completed lab report using R Markdown

Q: How does temperature and salinity co-vary over time?

Temperature variability over time

Code:

HydroS_shallow=
  HydroS_corrected %>% 
  filter(Depth<50) 

HydroS_shallow %>% 
  ggplot(aes(x=decy, y=T_cons_gsw))+
  geom_line()+
  geom_smooth(method=lm, color = "red")+
  xlab('Year')+
  ylab('Temperature (C)')+
theme_classic()
## `geom_smooth()` using formula = 'y ~ x'

Applied linear model regression

lm(T_cons_gsw~decy,data=HydroS_shallow)
## 
## Call:
## lm(formula = T_cons_gsw ~ decy, data = HydroS_shallow)
## 
## Coefficients:
## (Intercept)         decy  
##    -11.5656       0.0174
temp_time_model=lm(T_cons_gsw~decy,data=HydroS_shallow)

summary(temp_time_model)
## 
## Call:
## lm(formula = T_cons_gsw ~ decy, data = HydroS_shallow)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.2932 -2.6696 -0.3533  2.6582  6.3696 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -11.565567   4.918529  -2.351   0.0188 *  
## decy          0.017404   0.002469   7.049 2.12e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.926 on 3847 degrees of freedom
## Multiple R-squared:  0.01275,    Adjusted R-squared:  0.0125 
## F-statistic: 49.69 on 1 and 3847 DF,  p-value: 2.123e-12

Salinity variability over time

Code:

HydroS_shallow %>% 
  ggplot(aes(x=decy, y=S_abs_gsw))+
  geom_line()+
   geom_smooth(method=lm, color = "red")+
  xlab('Year')+
  ylab('Salinity')+
theme_classic()
## `geom_smooth()` using formula = 'y ~ x'

Applied linear model regression

sal_time_model=lm(S_abs_gsw~decy,data=HydroS_shallow)
summary(sal_time_model)
## 
## Call:
## lm(formula = S_abs_gsw ~ decy, data = HydroS_shallow)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.05036 -0.07784  0.02251  0.10302  0.73828 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3.149e+01  2.752e-01  114.43   <2e-16 ***
## decy        2.622e-03  1.381e-04   18.99   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1637 on 3847 degrees of freedom
## Multiple R-squared:  0.08568,    Adjusted R-squared:  0.08544 
## F-statistic: 360.5 on 1 and 3847 DF,  p-value: < 2.2e-16

Discussion - Temperature and Salinity Variability Over Time

Overall, both measured variables of temperature and salinity collected from Hydrostation S increase in shallow waters above 50 meters over time (1960 - 2020). To observe the relationship of temperature vs time and salinity vs time, the data required a linear model transformation. For this, the lm function R was applied. As a result, residual analysis and statistical summaries were generated for establishing a linear relationship of the plotted variables. However, there are observed differences in the slope and therefore the overall relationship between the studied parameters. The regression line exerted for the temperature variability in shallow depths is steeper than the line denoted for salinity variability. Consequently, we can debate that temperature has a stronger relationship and therefore presents greater change over time.